library(tidyverse)
library(RColorBrewer)
library(plotly)
library(plyr)
library(gt)
library(stringr)
library(stringi)
library(mapquestr)
library(leaflet)
remotes::install_github("chiouey/mapquestr")
dataset<- readr::read_csv("SGSF_Mentor_Mentee_Matching_For2022-10-27_13_05_29.csv")
head(dataset)
## # A tibble: 6 x 29
## `Submission Date` `First Name` `Last Name` `Home Address` `The name of t~`
## <chr> <chr> <chr> <chr> <chr>
## 1 27-Oct-22 Yaquelin Rodriguez <NA> University of H~
## 2 23-Oct-22 Jennifer Li <NA> Rice University
## 3 18-Oct-22 Joauna Carter 18003 Oak Cottage~ Harmony School ~
## 4 17-Oct-22 Nadiya Kabugu 13130 Fry Rd. Cyp~ Smith Middle Sc~
## 5 13-Oct-22 breida ulibarri <NA> <NA>
## 6 13-Oct-22 Daniella Garcia 7214 Calais Rd. H~ Kipp Sunnyside
## # ... with 24 more variables: `Date of Birth` <chr>, `Race or Ethnicity` <chr>,
## # Age <dbl>, `Your Current Grade` <chr>,
## # `After school, do you have reliable transportation, if you need to meet your mentor?` <chr>,
## # `Language Proficiencies. List all.` <chr>,
## # `Your Hobbies and Interests` <chr>, `What Best Describes you` <chr>,
## # `Your classification.` <chr>,
## # `If in college, your major/minor and any certifications.` <chr>, ...
The columns names need to be fixed. Those are the new column names.
data<-dataset[4:23]
##Fixing the colnames
names(data)[2] <- 'School Name'
names(data)[6] <- 'Current Grade'
names(data)[7] <- 'Reliable Transportation'
names(data)[8] <- 'Language Proficiencies'
names(data)[9] <- 'Hobbies and Interests'
names(data)[10] <- 'Personal Description'
names(data)[11] <- 'Classification'
names(data)[12] <- 'College Major/Minor/Certification'
names(data)[13] <- 'Email'
names(data)[14] <- 'Cell Number'
names(data)[16] <- 'Desired mentor role'
names(data)[17] <- 'Desired mentor impact area'
names(data)[18] <- 'Previous mentorship'
names(data)[19] <- 'Communication with Mentors preferences'
names(data)[20] <- ' Mentors genders preferences'
colnames(data)
## [1] "Home Address"
## [2] "School Name"
## [3] "Date of Birth"
## [4] "Race or Ethnicity"
## [5] "Age"
## [6] "Current Grade"
## [7] "Reliable Transportation"
## [8] "Language Proficiencies"
## [9] "Hobbies and Interests"
## [10] "Personal Description"
## [11] "Classification"
## [12] "College Major/Minor/Certification"
## [13] "Email"
## [14] "Cell Number"
## [15] "Professional Interest or Expertise"
## [16] "Desired mentor role"
## [17] "Desired mentor impact area"
## [18] "Previous mentorship"
## [19] "Communication with Mentors preferences"
## [20] " Mentors genders preferences"
summary(data)
## Home Address School Name Date of Birth Race or Ethnicity
## Length:18 Length:18 Length:18 Length:18
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Age Current Grade Reliable Transportation
## Min. :12.00 Length:18 Length:18
## 1st Qu.:17.00 Class :character Class :character
## Median :17.00 Mode :character Mode :character
## Mean :17.11
## 3rd Qu.:17.00
## Max. :23.00
## Language Proficiencies Hobbies and Interests Personal Description
## Length:18 Length:18 Length:18
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Classification College Major/Minor/Certification Email
## Length:18 Length:18 Length:18
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Cell Number Professional Interest or Expertise Desired mentor role
## Length:18 Length:18 Length:18
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Desired mentor impact area Previous mentorship
## Length:18 Length:18
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## Communication with Mentors preferences Mentors genders preferences
## Length:18 Length:18
## Class :character Class :character
## Mode :character Mode :character
##
##
##
sum(is.na(data))
## [1] 71
data <- data %>% mutate_all(~ifelse(is.nan(.), NA, .))
library(tidyverse)
library(plyr)
library(RColorBrewer)
library(plotly)
data$`Race or Ethnicity`
## [1] "Hispanic" "Asian" "Black" "African American"
## [5] "hispanic" "Hispanic" "Hispanic" "African American"
## [9] "Hispanic" "Hispanic" "Asian" "South Indian"
## [13] "hispanic" "Hispanic" "Houston" "Hispanic"
## [17] "Hispanic" "Hispanic"
data$`Race or Ethnicity`[data$`Race or Ethnicity` == "hispanic"] <- "Hispanic"
counts<-count(data$`Race or Ethnicity`)
names(counts)[1] <- 'Race or Ethnicity'
counts
## Race or Ethnicity freq
## 1 African American 2
## 2 Asian 2
## 3 Black 1
## 4 Hispanic 11
## 5 Houston 1
## 6 South Indian 1
colourCount = length(unique(counts$`Race or Ethnicity`))
getPalette = colorRampPalette(brewer.pal(colourCount, "Paired"))
p1<-ggplot(counts, aes(x=reorder(`Race or Ethnicity`, +freq), y=freq, fill=`Race or Ethnicity`,
text=paste("Race or Ethnicity:",`Race or Ethnicity`,
"<br>Count:",freq
)))+
geom_bar(stat = 'identity',width = 0.6)+
scale_y_continuous(breaks=c(0,2,4,6,8,10))+
ggtitle("Race or Ethnicity distribution") +
theme_classic()+
theme(legend.position="none")+
labs(y="Counts",x="Race or Ethnicity", caption = "Source: Mentor Mentee match datasets") +
theme(title = element_text(size = 9, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
legend.position="none",
panel.grid.minor = element_blank())+
geom_text(aes(label = signif(freq)),position=position_dodge(0.9),vjust = -1, size=3)
p1+scale_fill_manual(values = getPalette(colourCount))
ggplotly(p1,tooltip="text")
For the school Name variable, we will get the name of the schools, colleges and universities where SUPERGirls study at, and then we will get the number of SUPERGirls in each school.
data$`School Name`
## [1] "University of Houston"
## [2] "Rice University"
## [3] "Harmony School of Innovation - Katy (High)"
## [4] "Smith Middle School"
## [5] NA
## [6] "Kipp Sunnyside"
## [7] "Kipp Sunnnyside"
## [8] "Kipp Sunnyside"
## [9] "kipp sunnyside"
## [10] NA
## [11] "Kipp Sunnyside"
## [12] NA
## [13] "Kipp Sunnyside"
## [14] "KIPP Sunnyside"
## [15] "Kipp Sunnyside"
## [16] "Kipp Sunnyside"
## [17] NA
## [18] "Kipp sunnyside"
As we can see, “Kipp sunnyside” is wroted by the girls in many ways, we need it be written in the same way, so we can have under the same category while counting it.
library(tidyverse)
library(plyr)
library(RColorBrewer)
library(plotly)
data$`School Name`
## [1] "University of Houston"
## [2] "Rice University"
## [3] "Harmony School of Innovation - Katy (High)"
## [4] "Smith Middle School"
## [5] NA
## [6] "Kipp Sunnyside"
## [7] "Kipp Sunnnyside"
## [8] "Kipp Sunnyside"
## [9] "kipp sunnyside"
## [10] NA
## [11] "Kipp Sunnyside"
## [12] NA
## [13] "Kipp Sunnyside"
## [14] "KIPP Sunnyside"
## [15] "Kipp Sunnyside"
## [16] "Kipp Sunnyside"
## [17] NA
## [18] "Kipp sunnyside"
data$`School Name`[data$`School Name` == "KIPP Sunnyside"] <- "Kipp Sunnyside"
data$`School Name`[data$`School Name` == "kipp sunnyside"] <- "Kipp Sunnyside"
data$`School Name`[data$`School Name` == "Kipp sunnyside"] <- "Kipp Sunnyside"
data$`School Name`[data$`School Name` == "Kipp Sunnyside"] <- "Kipp Sunnyside"
data$`School Name`[data$`School Name` == "Kipp Sunnnyside"] <- "Kipp Sunnyside"
counts<-count(data$`School Name`)
names(counts)[1] <- 'School Name'
counts
## School Name freq
## 1 Harmony School of Innovation - Katy (High) 1
## 2 Kipp Sunnyside 10
## 3 Rice University 1
## 4 Smith Middle School 1
## 5 University of Houston 1
## 6 <NA> 4
colourCount = length(unique(counts$`School Name`))
getPalette = colorRampPalette(brewer.pal(colourCount, "Paired"))
p2<-ggplot(counts, aes(x=reorder(`School Name`, +freq), y=freq, fill=`School Name`,
text=paste("School Name:",`School Name`,
"<br>Count:",freq
)))+
geom_bar(stat = 'identity',width = 0.6)+
scale_y_continuous(breaks=c(0,2,4,6,8,10))+
ggtitle("School Name distribution") +
theme_classic()+
theme(legend.position="none")+
coord_flip()+
labs(x="Counts",y="School Name", caption = "Source: Mentor Mentee match datasets") +
theme(title = element_text(size = 9, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
legend.position="none",
panel.grid.minor = element_blank())+
geom_text(aes(label = signif(freq)),position=position_dodge(0.9),vjust = -1, size=3)
p2+scale_fill_manual(values = getPalette(colourCount))
counts1<-count(data$Age)
names(counts1)[1] <- 'Age'
colourCount = length(unique(counts1$Age))
getPalette = colorRampPalette(brewer.pal(colourCount, "Set3"))
p3<-ggplot(counts1, aes(x=reorder(Age, +freq), y=freq, fill=factor(Age),
text=paste("Age:",Age,
"<br>Count:",freq
)))+
geom_bar(stat = 'identity',width = 0.6)+
scale_y_continuous(breaks=c(0,2,4,6,8,10))+
ggtitle("Age distribution") +
theme(legend.position="none")+
theme_classic() +
labs(y="Counts",x="Age", caption = "Source: Mentor Mentee match datasets") +
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
legend.position="none",
panel.grid.minor = element_blank())+
geom_text(aes(label = signif(freq)),position=position_dodge(0.9),vjust = -1, size=3)
p3+scale_fill_manual(values = getPalette(colourCount))
In this part, we want to check the in which grade the majority of the girls are.
counts2<-count(data$`Current Grade` )
names(counts2)[1] <- 'Current Grade'
colourCount = length(unique(counts2$`Current Grade` ))
getPalette = colorRampPalette(brewer.pal(colourCount, "Set3"))
p4<-ggplot(counts2, aes(x=reorder(`Current Grade`, +freq), y=freq, fill=factor(`Current Grade`),
text=paste("Current Grade:",`Current Grade`,
"<br>Count:",freq
)))+
geom_bar(stat = 'identity',width = 0.6)+
scale_y_continuous(breaks=c(0,2,4,6,8,10,12))+
ggtitle("Grade distribution") +
theme(legend.position="none")+
labs(y="Counts",x="Grade", caption = "Source: Mentor Mentee match datasets") +
theme_bw()+
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_blank(),
legend.position="none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank())+
geom_text(aes(label = signif(freq)),position=position_dodge(0.9),vjust = -1, size=3)
p4+scale_fill_manual(values = getPalette(colourCount))
counts3<-count(data$`Reliable Transportation`)
names(counts3)[1] <- 'Reliable Transportation'
counts3
## Reliable Transportation freq
## 1 Possibly 6
## 2 Yes 12
colourCount = length(unique(counts3$`Reliable Transportation`))
getPalette = colorRampPalette(brewer.pal(colourCount, "Paired"))
p5<-ggplot(counts3, aes(x=reorder(`Reliable Transportation`, +freq), y=freq, fill=factor(`Reliable Transportation`),
text=paste("Reliable Transportation:",`Reliable Transportation`,
"<br>Count:",freq
)))+
geom_bar(stat = 'identity',width = 0.6)+
scale_y_continuous(breaks=c(0,2,4,6,8,10,12))+
ggtitle("Reliable Transportation") +
theme(legend.position="none")+
labs(y="Counts",x="Reliable transporation", caption = "Source: Mentor Mentee match datasets") +
theme_bw()+
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_blank(),
legend.position="none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank())+
geom_text(aes(label = signif(freq)), vjust = -0.5, size=3)
p5+scale_fill_manual(values = getPalette(colourCount))
We don’t have any girl that answered no, which is what we hope for.
data$`Language Proficiencies`
## [1] "English and Spanish" "English"
## [3] "English" "English"
## [5] "english" "English and Spanish"
## [7] "N/A" "English"
## [9] "English" "Spanish and English"
## [11] "english" "English"
## [13] "english and spanish" "Spanish,"
## [15] "English and a little Spanish" "English , Spanish"
## [17] "Spanish and English" "Spanish and English"
As we can see, while examining the data for this variable the “English” language is written by the girls in many ways, this will consider each way as a category, which is not correct. Lets fix those and create one “English” category, and the same for the other languages.
data$`Language Proficiencies`[data$`Language Proficiencies`=='english']<-"English"
data$`Language Proficiencies`[data$`Language Proficiencies`=='english and spanish']<-"Spanish and English"
data$`Language Proficiencies`[data$`Language Proficiencies`=='English and Spanish']<-"Spanish and English"
data$`Language Proficiencies`[data$`Language Proficiencies`=='English , Spanish']<-"Spanish and English"
data$`Language Proficiencies`[data$`Language Proficiencies`=='Spanish,']<-"Spanish"
data$`Language Proficiencies`
## [1] "Spanish and English" "English"
## [3] "English" "English"
## [5] "English" "Spanish and English"
## [7] "N/A" "English"
## [9] "English" "Spanish and English"
## [11] "English" "English"
## [13] "Spanish and English" "Spanish"
## [15] "English and a little Spanish" "Spanish and English"
## [17] "Spanish and English" "Spanish and English"
Now, our variable is ready for checking the distribution on a bar plot.
counts4=count(data$`Language Proficiencies`)
names(counts4)[1] <- 'Language Proficiencies'
# Language proficencies variable plot
colourCount = length(unique(counts4$`Language Proficiencies`))
getPalette = colorRampPalette(brewer.pal(colourCount, "Paired"))
p6<-ggplot(counts4, aes(x=reorder(`Language Proficiencies`, +freq), y=freq, fill=factor(`Language Proficiencies`),
text=paste("Language Proficiencies:",`Language Proficiencies`,
"<br>Count:",freq
)))+
geom_bar(stat = 'identity',width = 0.6)+
scale_y_continuous(breaks=c(0,2,4,6,8,10,12))+
ggtitle("Language Proficiencies") +
theme(legend.position="none")+
labs(y="Counts",x="Language Proficiencies", caption = "Source: Mentor Mentee match datasets") +
theme_bw()+
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_blank(),
legend.position="none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank())+
geom_text(aes(label = signif(freq)), vjust = -0.5, size=3)
p6+scale_fill_manual(values = getPalette(colourCount))
library(gt)
data1<-count(data$`Hobbies and Interests`)
Interests<-data1%>% as.data.frame()%>%
gt()%>%
tab_header(title =md("**SUPERGirls Interest**") ) %>%
cols_label(x = "Interests",freq='Frequency') %>%
tab_source_note(source_note = md("**Source: Mentor/Mentee Matching Dataset**"))%>%
tab_style(
style=cell_text(font = "calibri",
align = "center",
size=3),
locations=cells_body(columns =c(x,freq)
))%>%
tab_style(
style = list(
cell_borders(
sides = "bottom",
color = "black",
weight = px(3)
)
),
locations = list(
cells_column_labels(
columns = gt::everything()
)
)
) %>%
tab_style(
style = list(
cell_text(font = "Karla", weight = "bold", align = "center", size=6)
),
locations = list(
cells_column_labels(gt::everything())
)
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
heading.border.bottom.color = "black",
table_body.border.top.color = "black",
column_labels.border.bottom.color = "white",
table_body.hlines.color = "white",
table_body.border.bottom.color = "black",
row.striping.background_color = "black")
Interests
| SUPERGirls Interest | |
|---|---|
| Interests | Frequency |
| Anything related to cosmetology | 1 |
| Dance | 1 |
| Electronic Games Film Travel Music Volunteering Reading (Fantasy/Fiction) Baking | 1 |
| Electronic Games Music Volunteering Reading (Fantasy/Fiction) Literary Works Puzzles | 1 |
| Music | 1 |
| Music Reading (Fantasy/Fiction) Sports | 1 |
| Music Sports Dance | 1 |
| Music Sports Space, engineering, anything STEM related | 1 |
| Sports | 1 |
| Travel | 1 |
| Visual Arts Electronic Games Film Travel Music Volunteering Reading (Fantasy/Fiction) Literary Works | 1 |
| Visual Arts Electronic Games Music Politics Reading (Fantasy/Fiction) Literary Works | 1 |
| Visual Arts Electronic Games Music Volunteering | 1 |
| Visual Arts Music Volunteering | 1 |
| Visual Arts Volunteering | 1 |
| Volunteering Reading (Fantasy/Fiction) | 1 |
| Volunteering Sports | 1 |
| NA | 1 |
| Source: Mentor/Mentee Matching Dataset | |
We can see that we don’t have 2 girls that have exactly all the same interest but we do have girls that have common interest.
Again, we will present the girls personnel description in a gt table.
data2<-count(data$`Personal Description`)
Personal_Description <-data2%>% as.data.frame()%>%
gt()%>%
tab_header(title =md("**SUPERGirls Personal Description**") ) %>%
cols_label(x = "Personal Description",freq='Frequency') %>%
tab_source_note(source_note = md("**Source: Mentor/Mentee Matching Dataset**"))%>%
tab_style(
style=cell_text(font = "calibri",
align = "center"),
locations=cells_body(columns =c(x,freq)
))%>%
tab_style(
style = list(
cell_borders(
sides = "bottom",
color = "black",
weight = px(3)
)
),
locations = list(
cells_column_labels(
columns = gt::everything()
)
)
) %>%
tab_style(
style = list(
cell_text(font = "Karla", weight = "bold", align = "center")
),
locations = list(
cells_column_labels(gt::everything())
)
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
heading.border.bottom.color = "black",
table_body.border.top.color = "black",
column_labels.border.bottom.color = "white",
table_body.hlines.color = "white",
table_body.border.bottom.color = "black",
row.striping.background_color = "black")
Personal_Description
| SUPERGirls Personal Description | |
|---|---|
| Personal Description | Frequency |
| Business-oriented Constructive Empathetic Funny Lively Intuitive Passionate Reserved Reflective Vibrant | 1 |
| Business-oriented Constructive Empathetic Lively Passionate Reserved | 1 |
| Business-oriented Empathetic Passionate Reserved | 1 |
| Constructive Empathetic Intuitive Reserved Reflective Driven | 1 |
| Constructive Funny Intuitive Passionate Vibrant | 1 |
| Empathetic Funny Lively Intuitive Passionate Reflective Vibrant Can be reserved at times | 1 |
| Empathetic Lively Intuitive Passionate | 1 |
| Empathetic Lively Passionate | 1 |
| Funny Lively Reserved Reflective | 1 |
| Funny Passionate | 1 |
| Outgoing | 1 |
| Outgoing Business-oriented Lively Passionate Vibrant Unique | 1 |
| Outgoing Constructive Empathetic Intuitive Passionate Reserved Reflective Vibrant | 1 |
| Outgoing Constructive Passionate Reserved | 1 |
| Outgoing Funny Passionate | 1 |
| Outgoing Funny Passionate Reserved Reflective Vibrant | 1 |
| Outgoing Funny Shy | 1 |
| Reserved Reflective Vibrant | 1 |
| Source: Mentor/Mentee Matching Dataset | |
counts5<-count(data$`Professional Interest or Expertise`)%>% as.data.frame()
names(counts5)[1] <- 'Professional Interest or Expertise'
colourCount = length(unique(counts5$`Professional Interest or Expertise`))
getPalette = colorRampPalette(brewer.pal(colourCount, "Paired"))
p7<-ggplot(counts5, aes(x=reorder(`Professional Interest or Expertise`, +freq), y=freq, fill=factor(`Professional Interest or Expertise`),
))+
geom_bar(stat = 'identity',width = 0.6)+
ggtitle("SUPERGirls Professional Interest or Expertise") +
theme(legend.position="none")+
labs(y="Counts",x="Professional Interest or Expertise", caption = "Source: Mentor Mentee match datasets") +
scale_x_discrete(guide = guide_axis(n.dodge = 2))+
theme_bw()+
coord_flip()+
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_text(size = 5),
legend.position="none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
)+
geom_text(aes(label = signif(freq)), vjust = -0.5, size=3)
p7+scale_fill_manual(values = getPalette(colourCount))
Professional_Interest <- data.frame(Interest = c('Arts and Entertainment', 'Business and Development', 'Education', 'Cyber security', 'Computer Science', 'Engineering','Financial Services','Marketing and Advertising','Science and Math','Science and Technology','Information Technology','Health Care and Medicine','Ultrasound technician','Environmental Science','Financial Services','Biomedical Sciences','Aerospace,STEM'),
counting = c(sum(str_count(data$`Professional Interest or Expertise` , regex("\\bArts and Entertainment\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bBusiness and Development\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bEducation\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise`, regex("\\bCyber security\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bComputer Science\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bEngineering\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise`, regex("\\bFinancial Services\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bMarketing and Advertising\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bScience and Math\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bScience and Technology\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bInformation Technology\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bHealth Care and Medicine\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bUltrasound technician\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bEnvironmental Science\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bFinancial Services\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bBiomedical Sciences\\b", uword = T))),
sum(str_count(data$`Professional Interest or Expertise` , regex("\\bAerospace\\b", uword = T)))
)
)
Professional_Interest
## Interest counting
## 1 Arts and Entertainment 3
## 2 Business and Development 5
## 3 Education 1
## 4 Cyber security 4
## 5 Computer Science 4
## 6 Engineering 6
## 7 Financial Services 1
## 8 Marketing and Advertising 2
## 9 Science and Math 1
## 10 Science and Technology 5
## 11 Information Technology 1
## 12 Health Care and Medicine 12
## 13 Ultrasound technician 1
## 14 Environmental Science 1
## 15 Financial Services 1
## 16 Biomedical Sciences 1
## 17 Aerospace,STEM 1
Now, we can visualize this variable.
Professional_Interest<- head(Professional_Interest[order(Professional_Interest$counting
, decreasing = TRUE), ] ,5)
colourCount = 5
getPalette = colorRampPalette(brewer.pal(colourCount, "Set3"))
p8<-ggplot(Professional_Interest, aes(x=reorder(Interest, +counting), y=counting, fill=factor(Interest),
))+
geom_bar(stat = 'identity',width = 0.4)+
scale_y_continuous(limits = c(0, 15))+
ggtitle("SUPERGirls Professional Interest") +
theme(legend.position="none")+
labs(y="Counts",x="Interest", caption = "Source: Mentor Mentee match datasets") +
coord_flip()+
theme_bw()+
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_text(size = 5),
legend.position="none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
)+
geom_text(aes(label = signif(counting)), vjust = -0.5, size=3)
p8+scale_fill_manual(values = getPalette(colourCount))
For this variable, we will do the same, we need to breakdown and get an overview of the desired mentor role that girls prefer, in order to choose the right mentors for them.
counts6 <-count(data$`Desired mentor role`)%>% as.data.frame()
names(counts6)[1] <- 'Desired mentor role'
counts6
## Desired mentor role
## 1 Ally\nCareer Development\nListener\nLife Coach
## 2 Ally\nCareer Development\nListener\nTeacher
## 3 Ally\nColleague\nCareer Development\nIndustry Coach
## 4 Ally\nColleague\nCareer Development\nListener\nMotivator
## 5 Ally\nColleague\nCareer Development\nNurturer\nListener\nLife Coach\nIndustry Coach\nTeacher\nMotivator
## 6 Ally\nListener\nLife Coach\nTeacher\nMotivator
## 7 Career Development
## 8 Career Development\nListener\nTeacher\nMotivator
## 9 Colleague\nCareer Development\nLife Coach
## 10 Colleague\nCareer Development\nLife Coach\nIndustry Coach\nTeacher\nMotivator
## 11 Colleague\nCareer Development\nListener\nLife Coach\nMotivator
## 12 Colleague\nCareer Development\nListener\nLife Coach\nTeacher\nMotivator
## 13 Colleague\nListener\nLife Coach\nIndustry Coach\nMotivator
## 14 Listener\nLife Coach\nMotivator
## freq
## 1 1
## 2 1
## 3 1
## 4 1
## 5 2
## 6 1
## 7 2
## 8 2
## 9 1
## 10 2
## 11 1
## 12 1
## 13 1
## 14 1
# Professional Interest or Expertise variable plot
colourCount = length(unique(counts6$`Desired mentor role` ))
getPalette = colorRampPalette(brewer.pal(colourCount, "Paired"))
p9<-ggplot(counts6, aes(x=reorder(`Desired mentor role` , +freq), y=freq, fill=factor(`Desired mentor role`),
))+
geom_bar(stat = 'identity',width = 0.6)+
scale_y_continuous(breaks=c(1,2))+
ggtitle("SUPERGirls desired mentor role") +
theme(legend.position="none")+
labs(y="Counts",x="Desired mentor role", caption = "Source: Mentor Mentee match datasets") +
scale_x_discrete(guide = guide_axis(n.dodge = 2))+
theme_bw()+
coord_flip()+
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_text(size = 5),
legend.position="none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
)+
geom_text(aes(label = signif(freq)), vjust = -0.5, size=3)
p9+scale_fill_manual(values = getPalette(colourCount))
Mentor_role <- data.frame(role = c('Colleague', 'Listener', 'Life Coach', 'Teacher', 'Industry Coach', 'Motivator','Ally','Nurturer','Career Development'),
counting = c(sum(str_count(data$`Desired mentor role` , regex("\\bColleague\\b", uword = T))),
sum(str_count(data$`Desired mentor role` , regex("\\bListener\\b", uword = T))),
sum(str_count(data$`Desired mentor role` , regex("\\bLife Coach\\b", uword = T))),
sum(str_count(data$`Desired mentor role` , regex("\\bTeacher\\b", uword = T))),
sum(str_count(data$`Desired mentor role` , regex("\\bIndustry Coach\\b", uword = T))),
sum(str_count(data$`Desired mentor role` , regex("\\bMotivator\\b", uword = T))),
sum(str_count(data$`Desired mentor role` , regex("\\bAlly\\b", uword = T))),
sum(str_count(data$`Desired mentor role` , regex("\\bNurturer\\b", uword = T))),
sum(str_count(data$`Desired mentor role` , regex("\\bCareer Development\\b", uword = T)))
))
colourCount = length(unique(Mentor_role$role ))
getPalette = colorRampPalette(brewer.pal(colourCount, "Paired"))
p10<-ggplot(Mentor_role , aes(x=reorder(role, +counting), y=counting, fill=factor(role),
))+
geom_bar(stat = 'identity',width = 0.4)+
scale_y_continuous(breaks=c(0,5,10))+
ggtitle("SUPERGirls desired mentor role") +
theme(legend.position="none")+
labs(y="Counts",x="Mentor role", caption = "Source: Mentor Mentee match datasets") +
theme_bw()+
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_text(size = 5),
legend.position="none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
)+
geom_text(aes(label = signif(counting)), vjust = -0.5, size=3)
p10+scale_fill_manual(values = getPalette(colourCount))
counts8<-count(data$` Mentors genders preferences`)%>%as.data.frame()
names(counts8)[1] <- 'Mentors genders '
colourCount = length(unique(counts8$`Mentors genders `))
getPalette = colorRampPalette(brewer.pal(colourCount, "Set3"))
p11<-ggplot(counts8, aes(x=reorder(`Mentors genders `, +freq), y=freq, fill=`Mentors genders `,
text=paste("Mentors gender preference:",`Mentors genders `,
"<br>Count:",freq
)))+
geom_bar(stat = 'identity',width = 0.6)+
scale_y_continuous(breaks=c(0,2,4,6,8,10))+
ggtitle("Mentors genders preferences") +
theme_classic()+
theme(legend.position="none")+
labs(y="Counts",x="Mentors genders preferences", caption = "Source: Mentor Mentee match datasets") +
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
legend.position="none",
panel.grid.minor = element_blank())
p11+scale_fill_manual(values = getPalette(colourCount))
The SGSF wants to check if any girl had played a mentorship role before. the Graph below shows that no girl had this role before.
counts9<-count(data$`Previous mentorship`)%>%as.data.frame()
names(counts9)[1] <- 'Previous mentorship'
colourCount = length(unique(data$`Previous mentorship`))
getPalette = colorRampPalette(brewer.pal(colourCount, "Set3"))
p12<-ggplot(counts9, aes(x=reorder(`Previous mentorship`, +freq), y=freq, fill=`Previous mentorship`,
text=paste("Previous mentorship:",`Previous mentorship`,
"<br>Count:",freq
)))+
geom_bar(stat = 'identity',width = 0.6)+
ggtitle("Previous Mentorship") +
theme_classic()+
theme(legend.position="none")+
labs(y="Counts",x="Mentors genders preferences", caption = "Source: Mentor Mentee match datasets") +
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
legend.position="none",
panel.grid.minor = element_blank())
p12+scale_fill_manual(values = getPalette(colourCount))
Communication_with_Mentors_preferences <- data.frame(Tool = c('email', 'text', 'phone', 'personal', 'Teams','chat'),
counting = c(sum(str_count(data$`Communication with Mentors preferences` , regex("\\bemail\\b", uword = T))),
sum(str_count(data$`Communication with Mentors preferences` , regex("\\btext\\b", uword = T))),
sum(str_count(data$`Communication with Mentors preferences`, regex("\\bphone\\b", uword = T))),
sum(str_count(data$`Communication with Mentors preferences`, regex("\\bpersonal\\b", uword = T))),
sum(str_count(data$`Communication with Mentors preferences`, regex("\\bTeams\\b", uword = T))),
sum(str_count(data$`Communication with Mentors preferences`, regex("\\bchat\\b", uword = T)))
)
)
colourCount = length(unique(Communication_with_Mentors_preferences$Tool ))
getPalette = colorRampPalette(brewer.pal(colourCount, "Paired"))
p13<-ggplot(Communication_with_Mentors_preferences , aes(x=reorder(Tool, +counting), y=counting, fill=factor(Tool),
))+
geom_bar(stat = 'identity',width = 0.4)+
scale_y_continuous(breaks=c(0,5,10))+
ggtitle("SUPERGirls desired mentor communication") +
theme(legend.position="none")+
labs(y="Counts",x="Communication Form", caption = "Source: Mentor Mentee match datasets") +
theme_bw()+
theme(title = element_text(size = 10, face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 8, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
axis.text.y = element_text(size = 5),
legend.position="none",
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
)+
geom_text(aes(label = signif(counting)), vjust = -0.5, size=3)
p13+scale_fill_manual(values = getPalette(colourCount))
In this part, we want to show how to extract the Zip codes out of the home Addresses and added to the dataset as an individual column using readlines and stri_extract_last_regex functions, and the library(stringi).
install.packages(“remotes”) remotes::install_github(“chiouey/mapquestr”)
locations = c('18003 Oak Cottage Court, Richmond, TX 77407, United States',
'13130 Fry Rd, Cypress, TX 77433, United States',
'7214 Calais Rd,Houston, TX 77033, United States',
'2846 Tidewater Dr,Houston, TX 77045, United States',
'5934 Berkridge Houston, TX 77053, United States',
'8008 Lawler St, Houston, TX 77051, United States',
'3819 knotty Oaks Trl Houston, TX 77045, United States',
'5722 Southwind St, Houston, TX 77033, United States',
'5802 Southlea St, Houston Tx 77033, United States',
'5926 Ludington Dr, Houston, TX 77035, United States',
'5635 Southbrook Dr, Houston, TX 77033, United States',
'4344 Idaho St, Houston, TX 77021, United States',
'11318 Gladewater Dr, Pearland, TX 77584, United States',
'4314 White River Dr, Pasadena, TX 77504, United States',
'124 Palmyra St, Houston, Tx 77022, United States',
'2345 Sage Rd, Houston, TX 77065, United States')
geocode<-geocode_mapquest(locations,key="IJ3cr7YdAxOu9AN9De4FU42WNSVXYPAQ")%>% as.data.frame()
m <- leaflet() %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addMarkers(lng= geocode$lon, lat=geocode$lat)
m